# load("./data/Robjects/Annotated_Results_LvV.RData")
# load("./data/Robjects/Ensembl_annotations.RData")
# load("./data/Robjects/DE.RData")
# load("./data/Robjects/01_DDS.RData")
# load("./data/Robjects/02_annot.RData")
load("./data/Robjects/03_DDS.RData")

select genes to analyse

for now only using the gastroc tissue

# res_sorted <- data.frame(res.gastroc, stringsAsFactors = F) %>%
#   na.omit() %>%
#   subset(., (log2FoldChange >= 1 |
#                log2FoldChange <= -1)) %>% # set arbitrarily
#   arrange(desc(abs(log2FoldChange))) # order by fold change (absolute value)
# # up-regulated genes
# up <- na.omit(res_sorted) %>%
#   subset(.,  log2FoldChange > 0 & padj < 0.01)
res_sorted<-data.frame(gene = rownames(res.gastroc),res.gastroc,stringsAsFactors = F)%>%
  na.omit()%>%
  subset(., ( log2FoldChange >=1 | log2FoldChange <=-1))%>% # set abritarily
  arrange(desc( abs(log2FoldChange) )) # order by fold change (absolute value)
# up-regulated genes
up<-data.frame(rn= rownames(res_sorted),res_sorted) %>%
  na.omit()%>%
  subset(., log2FoldChange > 0 & padj < 0.05)
gp_up <- gost(row.names(up), organism = "mmusculus", evcodes = TRUE, ordered_query = TRUE,
            correction_method = "gSCS", user_threshold = 0.01, 
            sources = c("GO:BP", "GO:MF", "GO:CC", "KEGG", "REAC", "TF", "MIRNA", "CORUM", "HP", "HPA", "WP")) # ordered by foldchange; evcodes generates gene names (makes run time longer though)

Result visualization

require(enrichplot)
require(ggplot2)
require(viridis)
require(shiny)
require(DOSE)

div(gostplot(gp_up, interactive = TRUE), align = "right") # interactive plot; circle sizes are in accordance with corresponding term size; term location on the x-axis is fixed and terms from the same GO subtree are located closer to each other

# define as enrichResult object and plot
gp_up_df<-gp_up$result[,c("query", "source", "term_id", "term_name",
                          "p_value", "query_size", "intersection_size",
                          "term_size", "effective_domain_size", "intersection")]%>%
  mutate(intersection = gsub(",", "/", intersection),
         GeneRatio = paste0(intersection_size, "/", query_size),
         BgRatio = paste0(term_size, "/", effective_domain_size))
names(gp_up_df) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size", "geneID", "GeneRatio", "BgRatio")
row.names(gp_up_df) = gp_up_df$ID
#gp_up_df<-subset(gp_up_df, Category == "REAC") # can prefilter for databases of interest

# dotplot
enrichplot::dotplot(new("enrichResult", result = gp_up_df))+
  theme_minimal()+
  scale_color_viridis()+
  theme(axis.title=element_text(size=rel(1.5), colour = "black"),
        axis.line=element_line(colour = "grey40"),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.ticks.x=(element_line(size=0.5, colour = "grey40")))

# barplot
 barplot(new("enrichResult", result = gp_up_df), showCategory = 10)+
   ggplot2::facet_grid(~Cluster) +
   ggplot2::ylab("Intersection size")+
   theme_minimal()+
   scale_fill_viridis()+
   theme(axis.title=element_text(size=rel(1.5), colour = "black"),
        axis.line=element_line(colour = "grey40"),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.ticks.x=(element_line(size=0.5, colour = "grey40")))

# terms and gene names
enrichplot::heatplot(new("enrichResult", result = gp_up_df), showCategory = 10) +
  theme_minimal() +
  theme(
    axis.title = element_text(size = rel(1.5), colour = "black"),
    axis.text.x = element_text(angle = 90),
    axis.line = element_line(colour = "grey40"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.ticks.x = (element_line(size = 0.5, colour = "grey40"))
  )

# terms and gene names 2
enrichplot::cnetplot(
  new("enrichResult", result = gp_up_df),
  showCategory = 3,
  cex_label_gene = 0.5
) +
  scale_colour_manual(values = c("grey70", "black"), guide = FALSE)

# terms and gene names 3
enrichplot::cnetplot(
  new("enrichResult", result = gp_up_df),
  showCategory = 3,
  circular = TRUE,
  colorEdge = TRUE,
  cex_label_gene = 0.5
)

topGO

library(topGO)
library(org.Hs.eg.db)
# named vector of up-regulated genes
temp <- data.frame(rn = rownames(res_sorted), res_sorted) %>%
  na.omit() %>%
  mutate(up = ifelse(log2FoldChange > 0, padj, 1)) #set the p-value for down regulations to 1
# make a named vector
res_sorted_up <- temp$up
names(res_sorted_up) <- temp$rn
rm(temp)
# function for enrichment with p 0.05
topDiffGenes <- function(x) {
  return(x < 0.05)
}
### run
tg.up<-new("topGOdata",#just a name
           description="res_sorted_up",
           ontology="BP",
           allGenes=res_sorted_up[1:100], #named vector with adj. p-values
           geneSel=topDiffGenes,
           annot=annFUN.org,
           nodeSize = 10, #minimum number of genes in a GO category
           ID="Ensembl",
           mapping="org.Hs.eg.db")
Error in split.default(names(sort(nl)), f.index) : 
  first argument must be a vector

Reactome

“Reactome is a free, open-source, curated and peer-reviewed pathway database.” ReactomePA is a useful package that allows you to use Reactome database and visualize your results.

Finding significant Reactome pathways from gene list

#Make a data frame with gene names and corresponding ENTREZ IDs.
ensgEntrez_mutant <- AnnotationDbi::select(org.Mm.eg.db, keys=names(mutant_up), columns='ENSEMBLID', keytype='SYMBOL')
Error in .testForValidCols(x, cols) : 
  Invalid columns: ENSEMBLID. Please use the columns method to see a listing of valid arguments.

Visualizing Reactome results with ReactomePA

#You can save the plot as png but set the resolution (widthxheight).

png(file="ReactomeDot_Mutant_Control_Up.png", width=1440, height=1280)
dotplot(x_mutant_up, showCategory=30, font.size=15, label_format = 10) + ggtitle("Mutant vs Control Upregulated") + theme(plot.title = element_text(size = 30, face = "bold"))
dev.off()

#For the next one, we need to add Log2FoldChanges too. Make a named vector just like we did with p-adjusted values before.
lfc <-df_mutant$lfc
names(lfc)<-df_mutant$ENTREZID


#Make and save the plot
png(file="CNET_Mutant_Control_Up.png", width=1440, height=1440)
cnetplot(x_mutant_up, showCategory =10, foldChange=2^lfc,cex_label_gene = 1.2, cex_label_category = 1.8) + ggtitle("Mutant vs Control Upregulated") + theme(plot.title = element_text(size = 40, face = "bold"))
dev.off()
save(gp_up, tg.up, file = "./data/Robjects/05_enrichment.RData")
load("./data/Robjects/05_enrichment.RData")
---
title: "05 enrichment analysis"
author: "Nick Diercksen"
output:
  html_notebook:
    toc: yes
    toc_float: yes
editor_options: 
  chunk_output_type: inline
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
# library(ggplot2)
# library(fgsea)
library(gprofiler2)
library(DESeq2)
```



```{r}
# load("./data/Robjects/Annotated_Results_LvV.RData")
# load("./data/Robjects/Ensembl_annotations.RData")
# load("./data/Robjects/DE.RData")
# load("./data/Robjects/01_DDS.RData")
# load("./data/Robjects/02_annot.RData")
load("./data/Robjects/03_DDS.RData")
```


# select genes to analyse
for now only using the gastroc tissue
```{r}
# res_sorted <- data.frame(res.gastroc, stringsAsFactors = F) %>%
#   na.omit() %>%
#   subset(., (log2FoldChange >= 1 |
#                log2FoldChange <= -1)) %>% # set arbitrarily
#   arrange(desc(abs(log2FoldChange))) # order by fold change (absolute value)
# # up-regulated genes
# up <- na.omit(res_sorted) %>%
#   subset(.,  log2FoldChange > 0 & padj < 0.01)
res_sorted<-data.frame(gene = rownames(res.gastroc),res.gastroc,stringsAsFactors = F)%>%
  na.omit()%>%
  subset(., ( log2FoldChange >=1 | log2FoldChange <=-1))%>% # set abritarily
  arrange(desc( abs(log2FoldChange) )) # order by fold change (absolute value)
# up-regulated genes
up<-data.frame(rn= rownames(res_sorted),res_sorted) %>%
  na.omit()%>%
  subset(., log2FoldChange > 0 & padj < 0.05)

```

```{r}
gp_up <- gost(row.names(up), organism = "mmusculus", evcodes = TRUE, ordered_query = TRUE,
            correction_method = "gSCS", user_threshold = 0.01, 
            sources = c("GO:BP", "GO:MF", "GO:CC", "KEGG", "REAC", "TF", "MIRNA", "CORUM", "HP", "HPA", "WP")) # ordered by foldchange; evcodes generates gene names (makes run time longer though)
```


# Result visualization

```{r echo=T, fig.align="center", warning=FALSE, message=FALSE}
library(enrichplot)
library(ggplot2)
library(viridis)
library(shiny)
library(DOSE)

div(gostplot(gp_up, interactive = TRUE), align = "right") # interactive plot; circle sizes are in accordance with corresponding term size; term location on the x-axis is fixed and terms from the same GO subtree are located closer to each other

# define as enrichResult object and plot
gp_up_df<-gp_up$result[,c("query", "source", "term_id", "term_name",
                          "p_value", "query_size", "intersection_size",
                          "term_size", "effective_domain_size", "intersection")]%>%
  mutate(intersection = gsub(",", "/", intersection),
         GeneRatio = paste0(intersection_size, "/", query_size),
         BgRatio = paste0(term_size, "/", effective_domain_size))
names(gp_up_df) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size", "geneID", "GeneRatio", "BgRatio")
row.names(gp_up_df) = gp_up_df$ID
#gp_up_df<-subset(gp_up_df, Category == "REAC") # can prefilter for databases of interest

# dotplot
enrichplot::dotplot(new("enrichResult", result = gp_up_df))+
  theme_minimal()+
  scale_color_viridis()+
  theme(axis.title=element_text(size=rel(1.5), colour = "black"),
        axis.line=element_line(colour = "grey40"),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.ticks.x=(element_line(size=0.5, colour = "grey40")))
# barplot
 barplot(new("enrichResult", result = gp_up_df), showCategory = 10)+
   ggplot2::facet_grid(~Cluster) +
   ggplot2::ylab("Intersection size")+
   theme_minimal()+
   scale_fill_viridis()+
   theme(axis.title=element_text(size=rel(1.5), colour = "black"),
        axis.line=element_line(colour = "grey40"),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.ticks.x=(element_line(size=0.5, colour = "grey40")))
```


```{r, echo=T, fig.align="center", fig.width=10, warning=FALSE, message=FALSE}
# terms and gene names
enrichplot::heatplot(new("enrichResult", result = gp_up_df), showCategory = 10) +
  theme_minimal() +
  theme(
    axis.title = element_text(size = rel(1.5), colour = "black"),
    axis.text.x = element_text(angle = 90),
    axis.line = element_line(colour = "grey40"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.ticks.x = (element_line(size = 0.5, colour = "grey40"))
  )
# terms and gene names 2
enrichplot::cnetplot(
  new("enrichResult", result = gp_up_df),
  showCategory = 3,
  cex_label_gene = 0.5
) +
  scale_colour_manual(values = c("grey70", "black"), guide = FALSE)
# terms and gene names 3
enrichplot::cnetplot(
  new("enrichResult", result = gp_up_df),
  showCategory = 3,
  circular = TRUE,
  colorEdge = TRUE,
  cex_label_gene = 0.5
)

```

# topGO

```{r, echo=T, fig.align="center"}
library(topGO)
library(org.Hs.eg.db)
# named vector of up-regulated genes
temp <- data.frame(rn = rownames(res_sorted), res_sorted) %>%
  na.omit() %>%
  mutate(up = ifelse(log2FoldChange > 0, padj, 1)) #set the p-value for down regulations to 1
# make a named vector
res_sorted_up <- temp$up
names(res_sorted_up) <- temp$rn
rm(temp)
# function for enrichment with p 0.05
topDiffGenes <- function(x) {
  return(x < 0.05)
}
### run
tg.up<-new("topGOdata",#just a name
           description="res_sorted_up",
           ontology="BP",
           allGenes=res_sorted_up[1:100], #named vector with adj. p-values
           geneSel=topDiffGenes,
           annot=annFUN.org,
           nodeSize = 10, #minimum number of genes in a GO category
           ID="Ensembl",
           mapping="org.Hs.eg.db")

# Kolmogorov-Smirnov (KS)-test
results.ks <- runTest(tg.up, algorithm = "weight01", statistic = "ks")
qval <- data.frame(qVal = p.adjust(score(results.ks), method = "BH"),
                   GO.ID = names(score(results.ks)))
# results table
goEnrichment <-
  GenTable(tg.up,
           KS = results.ks,
           orderBy = "KS",
           topNodes = 10)
goEnrichment <- inner_join(goEnrichment, qval, by = "GO.ID")
head(goEnrichment)

```


### Reactome

"Reactome is a free, open-source, curated and peer-reviewed pathway database." ReactomePA is a useful package that allows you to use Reactome database and visualize your results.

### Finding significant Reactome pathways from gene list

```{r}
library(ReactomePA)

#You need to check what type of gene ID the current version of ReactomePA accepts: This can be ENTREZ ID, ENSEMBL ID etc.

#Here we will convert gene symbols (i.e. Lypla1) to ENTREZ ID (i.e. 18777). For this we need the following packages:
library(AnnotationDbi) 

#This example is from mouse data, so:
library(org.Mm.eg.db)

#Here we select the genes that are upregulated in mutant compared to control
res_mutant_ctrl_greater<-results(dds.gastroc, 
                                 contrast= c("genotype","KO","WT"),
                                 altHypothesis = "greater") 

#Add row names as column and omit NA
res_mutant_ctrl_greater<-data.frame(rn= rownames(res_mutant_ctrl_greater),res_mutant_ctrl_greater) %>% na.omit()

#Make a named vector of p-adjusted values. Names are the gene names.
mutant_up <- res_mutant_ctrl_greater$padj 
names(mutant_up) <- res_mutant_ctrl_greater$rn

#Make a data frame with gene names and corresponding ENTREZ IDs.
ensgEntrez_mutant <- AnnotationDbi::select(org.Mm.eg.db, keys=names(mutant_up), columns='ENSEMBLID', keytype='SYMBOL')

#Make a data frame with ENTREZ IDs, Log2FoldChanges and p-adjusted values.
df_mutant <-ensgEntrez_mutant %>%
  dplyr::inner_join(res_mutant_ctrl_greater, by=c("SYMBOL"="rn")) %>% 
  dplyr::select(ENTREZID,log2FoldChange,padj) %>% 
  dplyr::group_by(ENTREZID) %>%
  dplyr::summarise(lfc=mean(log2FoldChange), padj=min(padj))

#Make a list of the ENTREZ IDs of the genes with padj <= 0.1.
sg<-unique((df_mutant %>% dplyr::filter(padj<=0.1))$ENTREZID)

#Search the Reactome database for the pathways which our significant genes are annotated to. 
x_mutant_up <- enrichPathway(gene=sg, organism = "mouse", 
                             universe= unique(df_mutant$ENTREZID), 
                             pvalueCutoff=0.1, pAdjustMethod="BH", readable=T)

#Extract the results into a data frame.
x_mutant_up_df <- x_mutant_up@result
```

### Visualizing Reactome results with ReactomePA

```{r}
#You can save the plot as png but set the resolution (widthxheight).

png(file="ReactomeDot_Mutant_Control_Up.png", width=1440, height=1280)
dotplot(x_mutant_up, showCategory=30, font.size=15, label_format = 10) + ggtitle("Mutant vs Control Upregulated") + theme(plot.title = element_text(size = 30, face = "bold"))
dev.off()

#For the next one, we need to add Log2FoldChanges too. Make a named vector just like we did with p-adjusted values before.
lfc <-df_mutant$lfc
names(lfc)<-df_mutant$ENTREZID


#Make and save the plot
png(file="CNET_Mutant_Control_Up.png", width=1440, height=1440)
cnetplot(x_mutant_up, showCategory =10, foldChange=2^lfc,cex_label_gene = 1.2, cex_label_category = 1.8) + ggtitle("Mutant vs Control Upregulated") + theme(plot.title = element_text(size = 40, face = "bold"))
dev.off()
```



```{r}
save(gp_up, tg.up, file = "./data/Robjects/05_enrichment.RData")
load("./data/Robjects/05_enrichment.RData")
```

